home *** CD-ROM | disk | FTP | other *** search
Text File | 1992-05-06 | 6.0 KB | 242 lines | [TEXT/PJMM] |
- unit Levenshtein;
-
- {Compute the Levenshtein distance between a pair of strings.}
- {Adapted from C code presented in "Finding String Distances",}
- {Ray Valdés, Dr. Dobb’s Journal, April 1992, ppg. 56— 62, 107.}
- {Algorithm due to V.I. Levenshtein, as presented in "Time Warps,}
- {String Edits, and MacroMolecules: The Theory and Practice of}
- {Sequence Comparison", Sankoff and Kruskal, eds., Addison–Wesley,}
- {1983. Macintosh implementation for THINK Pascal by D.B.Lamkins.}
-
- interface
-
- type
- Opcode = (match, insert, delete, substitute);
- LevOp = record
- iA, iB: Integer;
- op: Opcode;
- end;
- LevOps = array[1..255] of LevOp;
- LevOpsPtr = ^LevOps;
- LevOpsHdl = ^LevOpsPtr;
-
- {Call InitLevDist to establish the costs of the four edit operations.}
- procedure InitLevDist (matchCost, insertCost, deleteCost, substituteCost: Integer);
-
- {LevDist returns the Levenshtein distance between the given strings. When non–nil,}
- {theOps is resized and filled in with the edit sequence (as defined in the article) and}
- {moves is the number of edits.}
- function LevDist (a, b: Str255; theOps: LevOpsHdl; var moves: Integer): Integer;
-
- implementation
-
- type
- MatrixCell = record
- distance: Integer;
- op: Opcode;
- end;
- MatrixCellPtr = ^MatrixCell;
- MatrixCellHdl = ^MatrixCellPtr;
- Move = record
- dRow, dCol: Integer;
- end;
-
- var
- theMatrix: MatrixCellHdl;
- theCost: array[Opcode] of Integer;
- theMoves: array[Opcode] of Move;
-
- procedure InitLevDist (matchCost, insertCost, deleteCost, substituteCost: Integer);
- begin
- theMatrix := nil;
- theCost[match] := matchCost;
- theCost[insert] := insertCost;
- theCost[delete] := deleteCost;
- theCost[substitute] := substituteCost;
- with theMoves[match] do
- begin
- dRow := -1;
- dCol := -1;
- end;
- with theMoves[insert] do
- begin
- dRow := 0;
- dCol := -1;
- end;
- with theMoves[delete] do
- begin
- dRow := -1;
- dCol := 0;
- end;
- with theMoves[substitute] do
- begin
- dRow := -1;
- dCol := -1;
- end;
- end;
-
- function LevDist (a, b: Str255; theOps: LevOpsHdl; var moves: Integer): Integer;
- var
- numRows, numCols: Integer;
-
- procedure InitializeMatrix;
- var
- i: Integer;
- p: MatrixCellPtr;
- begin
- with theMatrix^^ do
- begin
- distance := 0;
- op := delete;
- end;
- p := theMatrix^;
- for i := 1 to numCols - 1 do
- begin
- p := MatrixCellPtr(ORD(p) + SIZEOF(MatrixCell));
- with p^ do
- begin
- distance := i;
- op := insert;
- end;
- end;
- p := theMatrix^;
- for i := 1 to numRows - 1 do
- begin
- p := MatrixCellPtr(ORD(p) + SIZEOF(MatrixCell) * numCols);
- with p^ do
- begin
- distance := i;
- op := delete;
- end;
- end;
- end;
-
- procedure CalculateMatrix;
- var
- pC, pN, pW, pNW: MatrixCellPtr;
-
- procedure AdvancePtrs;
- begin
- pC := MatrixCellPtr(ORD(pC) + SIZEOF(MatrixCell));
- pN := MatrixCellPtr(ORD(pN) + SIZEOF(MatrixCell));
- pW := MatrixCellPtr(ORD(pW) + SIZEOF(MatrixCell));
- pNW := MatrixCellPtr(ORD(pNW) + SIZEOF(MatrixCell));
- end;
-
- var
- row, col: Integer;
-
- procedure CalculateCell;
- begin
- if pW^.distance < pN^.distance then
- if pW^.distance < pNW^.distance then
- begin
- pC^.op := insert;
- pC^.distance := pW^.distance + theCost[insert];
- end
- else if a[row] = b[col] then
- begin
- pC^.op := match;
- pC^.distance := pNW^.distance + theCost[match];
- end
- else
- begin
- pC^.op := substitute;
- pC^.distance := pNW^.distance + theCost[substitute];
- end
- else if pN^.distance < pNW^.distance then
- begin
- pC^.op := delete;
- pC^.distance := pN^.distance + theCost[delete];
- end
- else if a[row] = b[col] then
- begin
- pC^.op := match;
- pC^.distance := pNW^.distance + theCost[match];
- end
- else
- begin
- pC^.op := substitute;
- pC^.distance := pNW^.distance + theCost[substitute];
- end;
- end;
-
- begin
- pC := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numCols + 1));
- pN := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (0 + 1));
- pW := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numCols + 0));
- pNW := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (0 + 0));
- for row := 1 to numRows - 1 do
- begin
- for col := 1 to numCols - 1 do
- begin
- CalculateCell;
- AdvancePtrs;
- end;
- AdvancePtrs;
- end;
- end;
-
- procedure BacktrackMatrix;
- var
- pC: MatrixCellPtr;
- theDistance, index, row, col, deltaRow, deltaCol: Integer;
- whichOp: Opcode;
- begin
- pC := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numRows * numCols - 1));
- theDistance := pC^.distance;
- if theOps <> nil then
- begin
- SetHandleSize(Handle(theOps), (numRows + numCols) * SIZEOF(LevOp));
- index := 0;
- row := numRows - 1;
- col := numCols - 1;
- while (row > 0) | (col > 0) do
- begin
- whichOp := pC^.op;
- if whichOp <> match then
- begin
- index := index + 1;
- with theOps^^[index] do
- begin
- iA := row;
- iB := col;
- op := whichOp;
- end;
- end;
- with theMoves[whichOp] do
- begin
- deltaRow := dRow;
- deltaCol := dCol;
- end;
- pC := MatrixCellPtr(ORD(pC) + (deltaRow * numCols + deltaCol) * SIZEOF(MatrixCell));
- row := row + deltaRow;
- col := col + deltaCol;
- end;
- end;
- SetHandleSize(Handle(theOps), index * SIZEOF(LevOp));
- moves := index;
- LevDist := theDistance;
- end;
-
- var
- sizeNeeded: Size;
-
- begin {LevDist}
- numRows := length(a) + 1;
- numCols := length(b) + 1;
- if (theMatrix = nil) | (theMatrix^ = nil) then
- theMatrix := MatrixCellHdl(NewHandle(0));
- HNoPurge(Handle(theMatrix));
- sizeNeeded := Size(SIZEOF(MatrixCell)) * numRows * numCols;
- if sizeNeeded > GetHandleSize(Handle(theMatrix)) then
- SetHandleSize(Handle(theMatrix), SIZEOF(MatrixCell) * numRows * numCols);
- HLock(Handle(theMatrix));
- InitializeMatrix;
- CalculateMatrix;
- BacktrackMatrix;
- HUnlock(Handle(theMatrix));
- HPurge(Handle(theMatrix));
- end;
-
- end.